* Andrii Parkhomenko
* Project: Urban Structure When Working from Home, with Matt Delventhal
* wagebrackets_pwpuma.do
* Purpose: construct PWPUMA-level wage averages for each wage bin in CTPP
********************************************************************************
********************************************************************************

* ------------------------------------------------------------------------------
* Initial settings, read data, create variables

clear all

cd "D:\Dropbox\My research\Projects\Urban Structure When Working from Home\Data\data_in\Wages"
local dir_use D:\Data for research\IPUMS

use "`dir_use'\\usa_00156.dta"

local incometype incearn

if "`incometype'"=="incearn" {
	gen wageyr = incwage + incbus00
	}
if "`incometype'"=="incwage" {
	gen wageyr = incwage
	}

egen loc_work = concat(pwstate2 pwpuma00), punct("_")

* ------------------------------------------------------------------------------
* Construct hourly wages

gen hours = .
replace hours = uhrswork if uhrswork>0

gen weeks = .
replace weeks = 27+(39-27)/2 if wkswork2==3
replace weeks = 40+(47-40)/2 if wkswork2==4
replace weeks = 48+(49-48)/2 if wkswork2==5
replace weeks = 50+(52-50)/2 if wkswork2==6

gen hoursyr = hours*weeks if hours!=. & weeks!=. 
gen wagehr = wageyr / hoursyr if hours!=. & weeks!=.

* ------------------------------------------------------------------------------
* Clean data for calculating wages

gen labor_smpl = 1

replace labor_smpl = 0 if gq!=1													// drop observations in group quarters
*replace labor_smpl = 0 if age<18												// drop observations younger than 18 years old
replace labor_smpl = 0 if empstat!=1											// keep observations who report being employed
replace labor_smpl = 0 if wageyr==999999										// drop observations with absent info on wage income
replace labor_smpl = 0 if wageyr<=0 | inctot<=0									// drop observations with negative wage or total income
*replace labor_smpl = 0 if (ind1990>=900 | occ1990>=905 | occ1990==3)  			// drop government employees and military
*replace labor_smpl = 0 if uhrswork<35  											// keep those who worked 35 hours or more last week
*replace labor_smpl = 0 if WKSWORK2<3  											// keep those who worked more than 26 weeks last year
*replace labor_smpl = 0 if (wagehr<5.15/2 & (multyear==2006 | multyear==2007)) | /// drop those who earn less than 1/2 minimum federal wage
*	(wagehr<5.85/2 & multyear==2008) | (wagehr<6.55/2 & multyear==2009) | (wagehr<7.25/2 & multyear>=2010)

keep if labor_smpl==1

drop if pwstate2==0 | pwstate2==2 | pwstate2==15 | pwstate2>56 					// keep the 48 contiguous states and D.C.

* ------------------------------------------------------------------------------
* Construct NAICS industries

gen indnaicsgroup = substr(indnaics,1,2)
replace indnaicsgroup = "33" if indnaics=="3MS"
replace indnaicsgroup = "45" if indnaics=="4M" | indnaics=="4MS"
destring indnaicsgroup, replace

gen industry = .
replace industry = 1 if indnaicsgroup==11 | indnaicsgroup==21 
replace industry = 2 if indnaicsgroup==23
replace industry = 3 if indnaicsgroup==31 | indnaicsgroup==32 | indnaicsgroup==33
replace industry = 4 if indnaicsgroup==42
replace industry = 5 if indnaicsgroup==44 | indnaicsgroup==45 
replace industry = 6 if indnaicsgroup==48 | indnaicsgroup==49 | indnaicsgroup==22 
replace industry = 7 if indnaicsgroup==51
replace industry = 8 if indnaicsgroup==52 | indnaicsgroup==53 
replace industry = 9 if indnaicsgroup==54 | indnaicsgroup==55 | indnaicsgroup==56 
replace industry = 10 if indnaicsgroup==61 | indnaicsgroup==62 
replace industry = 11 if indnaicsgroup==71 | indnaicsgroup==72 
replace industry = 12 if indnaicsgroup==81
replace industry = 13 if indnaicsgroup==92 & ~(ind1990>=940 & ind1990<=960)
replace industry = 14 if indnaicsgroup==92 & (ind1990>=940 & ind1990<=960)

label define industrylab 1 "Agriculture, forestry, fishing and hunting, and mining" ///
						 2 "Construction" ///
						 3 "Manufacturing" ///
						 4 "Wholesale trade" ///						 
						 5 "Retail trade" ///
						 6 "Transportation and warehousing, and utilities" ///
						 7 "Information" ///	
						 8 "Finance, insurance, real estate and rental and leasing" ///
						 9 "Professional, scientific, management, administrative,  and waste management services" ///
						 10 "Educational, health and social services" ///	
						 11 "Arts, entertainment, recreation, accommodation and food services" ///
						 12 "Other services (except public administration)" ///
						 13 "Public administration" ///
						 14 "Armed forces"
label values industry industrylab  
						 
* ------------------------------------------------------------------------------
* Create control variables

* Race and sex
gen male = 0
replace male = 1 if sex==1
gen white = 0
replace white = 1 if race==1 & hispan==0

* Education 
gen college = 0 
replace college = 1 if educ>=10

* Age
gen agegroup = .
replace agegroup = 22 if age<25
replace agegroup = 27 if age>=25 & age<30
replace agegroup = 32 if age>=30 & age<35
replace agegroup = 37 if age>=35 & age<40
replace agegroup = 42 if age>=40 & age<45
replace agegroup = 47 if age>=45 & age<50
replace agegroup = 52 if age>=50 & age<55
replace agegroup = 57 if age>=55 & age<60
replace agegroup = 62 if age>=60
qui tab agegroup, gen(Dage)
drop Dage1

* Industry
forvalues di = 1/13 {  // industry dummies; see https://usa.ipums.org/usa-action/variables/IND1990#codes_section
	gen Dind`di' = 0
	}
replace Dind1 = 1 if (ind1990>=10 & ind1990<=32)    // AGRICULTURE, FORESTRY, AND FISHERIES
replace Dind2 = 1 if (ind1990>=40 & ind1990<=50)    // MINING
replace Dind3 = 1 if (ind1990==60)  		        // CONSTRUCTION
replace Dind4 = 1 if (ind1990>=100 & ind1990<=222)  // MANUFACTURING Nondurable Goods
replace Dind5 = 1 if (ind1990>=230 & ind1990<=392)  // MANUFACTURING Durable Goods
replace Dind6 = 1 if (ind1990>=400 & ind1990<=472)  // TRANSPORTATION, COMMUNICATIONS, AND OTHER PUBLIC UTILITIES
replace Dind7 = 1 if (ind1990>=500 & ind1990<=571)  // WHOLESALE TRADE
replace Dind8 = 1 if (ind1990>=580 & ind1990<=691)  // RETAIL TRADE
replace Dind9 = 1 if (ind1990>=700 & ind1990<=712)  // FINANCE, INSURANCE, AND REAL ESTATE
replace Dind10 = 1 if (ind1990>=721 & ind1990<=760) // BUSINESS AND REPAIR SERVICES
replace Dind11 = 1 if (ind1990>=761 & ind1990<=791) // PERSONAL SERVICES
replace Dind12 = 1 if (ind1990>=800 & ind1990<=810) // ENTERTAINMENT AND RECREATION SERVICES
replace Dind13 = 1 if (ind1990>=812 & ind1990<=893) // PROFESSIONAL AND RELATED SERVICE
drop Dind1  // base category

* Occupation
forvalues di = 1/16 {  // occupation dummies; see https://usa.ipums.org/usa-action/variables/OCC1990#codes_section
	gen Docc`di' = 0
	}
replace Docc1 = 1 if (occ1990>=3 & occ1990<=22)     // Executive, Administrative, and Managerial Occupations
replace Docc2 = 1 if (occ1990>=23 & occ1990<=37)    // Management Related Occupations
replace Docc3 = 1 if (occ1990>=43 & occ1990<=200)   // Professional Specialty Occupations
replace Docc4 = 1 if (occ1990>=203 & occ1990<=235)  // Technicians and Related Support Occupations
replace Docc5 = 1 if (occ1990>=243 & occ1990<=283)  // Sales Occupations
replace Docc6 = 1 if (occ1990>=303 & occ1990<=389)  // Administrative Support Occupations, Including Clerical Supervisors, Administrative Support Occupations
replace Docc7 = 1 if (occ1990>=405 & occ1990<=407)  // Private Household Occupations
replace Docc8 = 1 if (occ1990>=415 & occ1990<=427)  // Protective Service Occupations
replace Docc9 = 1 if (occ1990>=434 & occ1990<=469)  // Service Occupations, Except Protective and Household Food Preparation and Service Occupations
replace Docc10 = 1 if (occ1990>=473 & occ1990<=498) // FARMING, FORESTRY, AND FISHING OCCUPATIONS
replace Docc11 = 1 if (occ1990>=503 & occ1990<=549) // Mechanics and Repairers
replace Docc12 = 1 if (occ1990>=558 & occ1990<=599) // Construction Trades
replace Docc13 = 1 if (occ1990>=614 & occ1990<=617) // Extractive Occupations
replace Docc14 = 1 if (occ1990>=628 & occ1990<=699) // Precision Production Occupations
replace Docc15 = 1 if (occ1990>=703 & occ1990<=799) // Machine Operators, Assemblers, and Inspectors
replace Docc16 = 1 if (occ1990>=803 & occ1990<=889) // Transportation and Material Moving Occupations
drop Docc10  // base category

* ------------------------------------------------------------------------------
* Estimate adjusted wages

gen logwageyr = log(wageyr)

reg logwageyr male white Dind* Docc* Dage* college [w=perwt]
predict wresid, residuals
local constant _b[_cons]
gen logwageyradj = `constant' + wresid

gen wageyradj = exp(logwageyradj)
drop wresid

* ------------------------------------------------------------------------------
* Calculate PWPUMA bin averages by industry

foreach w in wageyr wageyradj {

	gen `w'_perwt = `w'*perwt

	local wb0 = 0

	foreach wb1 in 10000 15000 25000 35000 50000 65000 75000 100000 { 

		bys loc_work year industry: egen mean2_`w'_`wb0'_`wb1' = sum(`w'_perwt) if wageyr>=`wb0' & wageyr<`wb1'
		bys loc_work year industry: egen sum_perwt_`w'_`wb0'_`wb1' = sum(perwt) if wageyr>=`wb0' & wageyr<`wb1'
		replace mean2_`w'_`wb0'_`wb1' = mean2_`w'_`wb0'_`wb1' / sum_perwt_`w'_`wb0'_`wb1'
		bys loc_work year industry: egen mean_`w'_`wb0'_`wb1' = mean(mean2_`w'_`wb0'_`wb1')
		
		local wb0 = `wb1'
		}
	
	bys loc_work year industry: egen mean2_`w'_100000_inf = sum(`w'_perwt) if wageyr>=100000
	bys loc_work year industry: egen sum_perwt_`w'_100000_inf = sum(perwt) if wageyr>=100000
	replace mean2_`w'_100000_inf = mean2_`w'_100000_inf / sum_perwt_`w'_100000_inf
	bys loc_work year industry: egen mean_`w'_100000_inf = mean(mean2_`w'_100000_inf)

	bys loc_work year industry: egen mean_`w'_all = sum(`w'_perwt)
	bys loc_work year industry: egen sum_perwt_`w'_all = sum(perwt)
	replace mean_`w'_all = mean_`w'_all / sum_perwt_`w'_all	
	
	}
	
	
* ------------------------------------------------------------------------------
* Clean, organize and save

gen fobs = 0
bys loc_work year industry: replace fobs = 1 if _n==1
keep if fobs==1

rename pwstate2 statename
gen statecode = statename

keep loc_work statename statecode pwpuma00 year industry mean_wageyr_* mean_wageyradj_*
order year statename statecode pwpuma00 loc_work industry
sort year statecode pwpuma00 industry

save wagebrackets_wpuma_`incometype'_ind, replace
export delimited using wagebrackets_wpuma_`incometype'_ind.csv, replace

